# ============================================================
# Thomas König & Stefan Eschenwecker
# The European Court of Justice and legal European integration
# 
# This script shows the data preparation.
# ============================================================

# load libraries
library(tidyverse)

# load data

# Schroeder 2024 replication data
# (available from: https://www.tandfonline.com/doi/full/10.1080/13501763.2023.2208165#d1e151)
load("Court/Data/Schroeder_2024_ReplicationData.RData")
# CJEU database
# (available at: https://www.iuropa.pol.gu.se/)
load("Court/Data/IUROPA-CJEU-Database-issues.RData")
load("Court/Data/IUROPA-CJEU-Database-positions.RData")
load("Court/Data/IUROPA-CJEU-Database-Platform/IUROPA-CJEU-Database-Platform-decisions.RData")
load("Court/Data/IUROPA-CJEU-Database-Platform/IUROPA-CJEU-Database-Platform-judgments.RData")
# Banzhaf country weights
CountryWeights95 <- read.csv("Court/Data/CountryWeights1995.csv")
CountryWeights04 <- read.csv("Court/Data/CountryWeights2004.csv")
CountryWeights07 <- read.csv("Court/Data/CountryWeights2007.csv")


# prepare positions data
Positions <- positions %>%
  select(
    legal_issue_id, national_autonomy, list_actors_primary
  ) %>%
  mutate(
    pos_cjeu = ifelse(str_detect(list_actors_primary, "Court of Justice of the European Union \\(CJEU\\)"),
      national_autonomy, NA
    ),
    pos_com = ifelse(str_detect(list_actors_primary, "European Commission"),
      national_autonomy, NA
    ),
    pos_ag = ifelse(str_detect(list_actors_primary, "Advocate General"),
      national_autonomy, NA
    ),
    pos_ep = ifelse(str_detect(list_actors_primary, "European Parliament \\(EP\\)"),
      national_autonomy, NA
    ),
    pos_council = ifelse(str_detect(list_actors_primary, "Council of the European Union"),
      national_autonomy, NA
    ),
    pos_at = ifelse(str_detect(list_actors_primary, "Austria"),
      national_autonomy, NA
    ),
    pos_be = ifelse(str_detect(list_actors_primary, "Belgium"),
      national_autonomy, NA
    ),
    pos_dk = ifelse(str_detect(list_actors_primary, "Denmark"),
      national_autonomy, NA
    ),
    pos_de = ifelse(str_detect(list_actors_primary, "Germany"),
      national_autonomy, NA
    ),
    pos_el = ifelse(str_detect(list_actors_primary, "Greece"),
      national_autonomy, NA
    ),
    pos_es = ifelse(str_detect(list_actors_primary, "Spain"),
      national_autonomy, NA
    ),
    pos_fr = ifelse(str_detect(list_actors_primary, "France"),
      national_autonomy, NA
    ),
    pos_hu = ifelse(str_detect(list_actors_primary, "Hungary"),
      national_autonomy, NA
    ),
    pos_ie = ifelse(str_detect(list_actors_primary, "Ireland"),
      national_autonomy, NA
    ),
    pos_it = ifelse(str_detect(list_actors_primary, "Italy"),
      national_autonomy, NA
    ),
    pos_lu = ifelse(str_detect(list_actors_primary, "Luxembourg"),
      national_autonomy, NA
    ),
    pos_nl = ifelse(str_detect(list_actors_primary, "Netherlands"),
      national_autonomy, NA
    ),
    pos_pt = ifelse(str_detect(list_actors_primary, "Portugal"),
      national_autonomy, NA
    ),
    pos_fi = ifelse(str_detect(list_actors_primary, "Finland"),
      national_autonomy, NA
    ),
    pos_se = ifelse(str_detect(list_actors_primary, "Sweden"),
      national_autonomy, NA
    ),
    pos_uk = ifelse(str_detect(list_actors_primary, "United Kingdom"),
      national_autonomy, NA
    ),
    pos_ee = ifelse(str_detect(list_actors_primary, "Estonia"),
      national_autonomy, NA
    ),
    pos_lv = ifelse(str_detect(list_actors_primary, "Latvia"),
      national_autonomy, NA
    ),
    pos_lt = ifelse(str_detect(list_actors_primary, "Lithuania"),
      national_autonomy, NA
    ),
    pos_pl = ifelse(str_detect(list_actors_primary, "Poland"),
      national_autonomy, NA
    ),
    pos_cz = ifelse(str_detect(list_actors_primary, "Czechia"),
      national_autonomy, NA
    ),
    pos_sk = ifelse(str_detect(list_actors_primary, "Slovakia"),
      national_autonomy, NA
    ),
    pos_si = ifelse(str_detect(list_actors_primary, "Slovenia"),
      national_autonomy, NA
    ),
    pos_cy = ifelse(str_detect(list_actors_primary, "Cyprus"),
      national_autonomy, NA
    ),
    pos_mt = ifelse(str_detect(list_actors_primary, "Malta"),
      national_autonomy, NA
    ),
    pos_bg = ifelse(str_detect(list_actors_primary, "Bulgaria"),
      national_autonomy, NA
    ),
    pos_ro = ifelse(str_detect(list_actors_primary, "Romania"),
      national_autonomy, NA
    )
  ) %>%
  group_by(legal_issue_id) %>%
  fill(starts_with("pos_"), .direction = "downup") %>%
  mutate(across(starts_with("pos_"), ~ ifelse(n_distinct(.) > 1, first(.), .))) %>%
  ungroup() %>% # 35 double codings of primary positions -> use first entry
  select(-national_autonomy, -list_actors_primary, -pos_ep, -pos_council) %>%
  relocate(legal_issue_id) %>%
  distinct()


# prepare issues data
Issues <- issues %>%
  select(
    iuropa_decision_id:legal_issue,
    about_derogation:national_law_primary
  ) %>%
  relocate(legal_issue_id)

# merge issues and positions
MergedIssPos <- left_join(Issues, Positions, by = "legal_issue_id")
rm(Issues, issues, positions, Positions)


# prepare decision data
Decisions <- decisions %>%
  select(
    iuropa_decision_id,
    iuropa_case_id,
    iuropa_proceeding_id, proceeding_name,
    case_year, decision_date
  ) %>%
  rename(year_lodged = case_year)

# merge decision with issues + positions
MergedIssPosDec <- left_join(MergedIssPos, Decisions, by = "iuropa_decision_id") %>%
  relocate(iuropa_case_id:decision_date, .after = cjeu_decision_id)
rm(Decisions, decisions, MergedIssPos)

# prepare judgments data
Judgments <- judgments %>%
  select(
    iuropa_decision_id,
    proceeding_date, duration_days,
    count_judges,
    iuropa_judge_rapporteur_id, judge_rapporteur
  )

# merge judgments with issues + positions + decisions
MergedIssPosDecJudg <- left_join(MergedIssPosDec, Judgments, by = "iuropa_decision_id") %>%
  relocate(count_judges:judge_rapporteur, .after = national_law_primary) %>%
  relocate(proceeding_date:duration_days, .after = year_lodged)
rm(Judgments, judgments, MergedIssPosDec)

# prepare Schroeder replication data
RelData <- df %>%
  select(
    cjeu_decision_id,
    count_subject_keywords, # case complexity
    list_advocates_generals, ms_advocates_generals, # ag
    nth_interpretation # number of previous interpretations
  ) %>%
  group_by(cjeu_decision_id) %>%
  distinct() %>%
  ungroup() %>%
  filter(!cjeu_decision_id == "ECLI:EU:C:2009:375:20100415")


# merge CJEU Database with Schroeder replication file
FinalData <- left_join(MergedIssPosDecJudg, RelData, by = "cjeu_decision_id") %>%
  relocate(count_subject_keywords:nth_interpretation,
    .after = judge_rapporteur
  )
rm(df, RelData, MergedIssPosDecJudg)


# final data adjustments of data
FinalDataPrep <- FinalData %>%
  # pick first AG if multiple listed (only 7 cases)
  mutate(list_advocates_generals = str_remove(list_advocates_generals, ";.*$")) %>%
  rename(
    ag = list_advocates_generals,
    ms_ag = ms_advocates_generals
  ) %>%
  # add number of ms observations per issue (direction)
  rowwise() %>%
  mutate(
    sum_obs_ms = sum(!is.na(c_across(pos_at:pos_ro))),
    sum_ms_ps = sum(str_detect(c_across(pos_at:pos_ro), "would not restrict autonomy"), na.rm = T),
    sum_ms_me = sum(str_detect(c_across(pos_at:pos_ro), "would restrict autonomy"), na.rm = T),
    sum_ms_compet = sum(str_detect(c_across(pos_at:pos_ro), "competing effect on autonomy"), na.rm = T),
    sum_ms_not_applic = sum(str_detect(c_across(pos_at:pos_ro), "not applicable"), na.rm = T)
  ) %>%
  ungroup() %>%
  # add weighted ms observations + "net" share of ms preferences
  mutate(
    across(pos_at:pos_ro, ~ case_when(
      .x == "would not restrict autonomy" & year_lodged < 2004 ~ CountryWeights95[CountryWeights95$country == cur_column(), 2],
      .x == "would not restrict autonomy" & year_lodged >= 2004 & year_lodged < 2007 ~ CountryWeights04[CountryWeights04$country == cur_column(), 2],
      .x == "would not restrict autonomy" & year_lodged >= 2007 ~ CountryWeights07[CountryWeights07$country == cur_column(), 2],
      T ~ NA_real_
    ), .names = "w_{.col}_ps")
  ) %>%
  rowwise() %>%
  mutate(weighted_ms_ps = sum(c_across(starts_with("w_")), na.rm = T)) %>%
  ungroup() %>%
  select(-starts_with("w_")) %>%
  mutate(
    across(pos_at:pos_ro, ~ case_when(
      .x == "would restrict autonomy" & year_lodged < 2004 ~ CountryWeights95[CountryWeights95$country == cur_column(), 2],
      .x == "would restrict autonomy" & year_lodged >= 2004 & year_lodged < 2007 ~ CountryWeights04[CountryWeights04$country == cur_column(), 2],
      .x == "would restrict autonomy" & year_lodged >= 2007 ~ CountryWeights07[CountryWeights07$country == cur_column(), 2],
      T ~ NA_real_
    ), .names = "w_{.col}_me")
  ) %>%
  rowwise() %>%
  mutate(weighted_ms_me = sum(c_across(starts_with("w_")), na.rm = T)) %>%
  ungroup() %>%
  select(-starts_with("w_")) %>%
  mutate(
    net_ms_pref = weighted_ms_me - weighted_ms_ps,
    # add information if observation from home state of AG
    pos_hs_ag = case_when(
      ms_ag == "Austria" ~ pos_at,
      ms_ag == "Belgium" ~ pos_be,
      ms_ag == "Denmark" ~ pos_dk,
      ms_ag == "Germany" ~ pos_de,
      ms_ag == "Greece" ~ pos_el,
      ms_ag == "Spain" ~ pos_es,
      ms_ag == "France" ~ pos_fr,
      ms_ag == "Ireland" ~ pos_ie,
      ms_ag == "Italy" ~ pos_it,
      ms_ag == "Luxembourg" ~ pos_lu,
      ms_ag == "Netherlands" ~ pos_nl,
      ms_ag == "Portugal" ~ pos_pt,
      ms_ag == "Finland" ~ pos_fi,
      ms_ag == "Sweden" ~ pos_se,
      ms_ag == "United Kingdom" ~ pos_uk,
      ms_ag == "Slovakia" ~ pos_sk,
      ms_ag == "Slovenia" ~ pos_si,
      .default = NA_character_
    ),
    ms_obs_ag = ifelse(!is.na(pos_hs_ag), 1, 0)
  ) %>%
  select(-pos_at:-pos_ro, -pos_hs_ag) %>%
  # add whether no AG opinion required
  # (discretion of Court)
  mutate(ag_nosig = ifelse(is.na(pos_ag), 1, 0)) %>%
  # drop 63 cases in DV with coding "uncertain" + 31 missing cases in DV
  filter(!pos_cjeu == "uncertain") %>%
  # scale number of judges by maximum
  # (enlargements in 2004 and 2007)
  mutate(share_judges = case_when(
    year_lodged < 2004 ~ round(count_judges / 15, 5),
    year_lodged >= 2004 & year_lodged < 2007 ~ round(count_judges / 25, 5),
    year_lodged >= 2007 ~ round(count_judges / 27, 5),
    .default = NA_real_
  )) %>%
  relocate(share_judges, .after = count_judges) %>%
  # create ms signal variable for intensity
  # (location + intensity)
  rowwise() %>% 
  mutate(ms_signal_int = sum(sum_ms_me, sum_ms_ps)) %>% 
  ungroup() %>% 
    # add LN coding of ambivalent outcome
    # (not applicable + competing effect)
  mutate(  
  cjeu_not_applic = ifelse(pos_cjeu == "not applicable", 1, 0),
    pos_cjeu_ln = case_when(
      pos_cjeu == "would restrict autonomy" ~ "ME",
      pos_cjeu == "would not restrict autonomy" ~ "PS",
      pos_cjeu == "not applicable" |
        pos_cjeu == "competing effect on autonomy" ~ "Ambi",
      .default = NA_character_
    )
  ) %>%
  relocate(pos_cjeu_ln, .after = pos_cjeu) %>%
  # create supranational signal
  # (first create helper dummies)
  mutate(
    com_me = ifelse(pos_com == "would restrict autonomy", 1, 0),
    com_ps = ifelse(pos_com == "would not restrict autonomy", 1, 0),
    com_ambi = ifelse(com_me == 0 & com_ps == 0, 1, 0),
    com_nosig = ifelse(is.na(pos_com), 1, 0),
    ag_me = ifelse(pos_ag == "would restrict autonomy", 1, 0),
    ag_ps = ifelse(pos_ag == "would not restrict autonomy", 1, 0),
    ag_ambi = ifelse(ag_me == 0 & ag_ps == 0, 1, 0),
  ) %>%
  mutate(
    supra_signal = case_when(
      com_me == 1 & ag_me == 1 ~ "Strong ME",
      com_ps == 1 & ag_ps == 1 ~ "Strong PS",
      (com_me == 1 & (ag_ambi == 1 | ag_nosig == 1)) |
        ((com_ambi == 1 | com_nosig == 1) & ag_me == 1) ~ "Weak ME",
      (com_ps == 1 & (ag_ambi == 1 | ag_nosig == 1)) |
        ((com_ambi == 1 | com_nosig == 1) & ag_ps == 1) ~ "Weak PS",
      (com_ambi == 1 | com_nosig == 1) & (ag_ambi == 1 | ag_nosig == 1) ~ "Uninfo",
      (com_me == 1 & ag_ps == 1) | (com_ps == 1 & ag_me == 1) ~ "Cont",
      .default = NA_character_
    ),
    supra_signal_mono = ifelse(supra_signal == "Cont", "Uninfo", supra_signal),
    supra_signal_int = case_when(
      supra_signal == "Uninfo" ~ "No Intensity",
      supra_signal == "Weak ME" | supra_signal == "Weak PS" ~ "Weak Intensity",
      supra_signal %in% c("Strong ME", "Strong PS", "Cont") ~ "Strong Intensity",
      .default = NA_character_
    )
  ) %>%
  select(-com_me:-ag_ambi) %>%
  mutate(
    all_not_applic = ifelse(
      cjeu_not_applic == 1 &
        pos_ag == "not applicable" & pos_com == "not applicable" &
        sum_ms_me == 0 & sum_ms_ps == 0 & sum_ms_compet == 0 &
        sum_ms_not_applic > 0,
      1, 0
    )
  ) %>%
  select(-sum_ms_compet, -sum_ms_not_applic) %>% 
  # standardize some variables
  # (easier for defining priors)
  mutate(
    share_judges_std = (share_judges - mean(share_judges)) / sd(share_judges),
    sum_ms_ps_std = (sum_ms_ps - mean(sum_ms_ps)) / sd(sum_ms_ps),
    sum_ms_me_std = (sum_ms_me - mean(sum_ms_me)) / sd(sum_ms_me),
    net_ms_pref_std = (net_ms_pref - mean(net_ms_pref)) / sd(net_ms_pref),
    ms_signal_int_std = (ms_signal_int - mean(ms_signal_int)) / sd(ms_signal_int)
  )



# save the data
write_csv(FinalDataPrep, "Court/Data/PreparedCourtData.csv")
